{
  "cells": [
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "qtRwI4XE1tds"
      },
      "outputs": [],
      "source": [
        "import pandas as pd\n",
        "import matplotlib.pyplot as plt\n",
        "import numpy as np\n",
        "import itertools\n",
        "import tqdm\n",
        "\n",
        "import results_analysis_utils\n",
        "import setup_stability_score"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "0rVjVGXu3bCH"
      },
      "outputs": [],
      "source": [
        "# Function for computing all 4 evaluation measures described in the paper\n",
        "\n",
        "def get_evaluation_measure(transferability_scores, actual_performances, transferability_metric, target_dataset, sources_id):\n",
        "  \"\"\"Calculates evaluation measures given a transferability metric, a target dataset and a set of sources.\"\"\"\n",
        "\n",
        "  w_kendall = results_analysis_utils.get_correlation_scores(\n",
        "    actual_performances, transferability_scores, 'w-kendall')\n",
        "  \n",
        "  kendall = results_analysis_utils.get_correlation_scores(\n",
        "    actual_performances, transferability_scores, 'kendall')\n",
        "  \n",
        "  pearson = results_analysis_utils.get_correlation_scores(\n",
        "    actual_performances, transferability_scores, 'pearson')\n",
        "\n",
        "  relative_top_accuracy = results_analysis_utils.relative_top_accuracy(\n",
        "    actual_performances, transferability_scores)\n",
        "  \n",
        "  results = []\n",
        "  for scores, name_scores in zip(\n",
        "      [w_kendall, kendall, pearson, relative_top_accuracy],\n",
        "      ['w-kendall', 'kendall', 'pearson', 'rel']):\n",
        "    results.append([transferability_metric, target_dataset, float(scores), name_scores, sources_id])\n",
        "  \n",
        "  return results"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "8_uCALWU6q-N"
      },
      "outputs": [],
      "source": [
        "# Function for generating and evaluating experiments by subsampling over available resources\n",
        "\n",
        "def evaluate_subsampled_experiments(data_df: pd.DataFrame, sampling_ratio: float, scenario: str, frozen_backbone: bool=False) -\u003e pd.DataFrame:\n",
        "  \"\"\"Evaluate subsampled experiments keeping sampling_ratio percentage of elements.\"\"\"\n",
        "\n",
        "  assert scenario in ['classification', 'semantic segmentation']\n",
        "  sampling_what = 'Source Architecture' if scenario == 'classification' else 'Source Dataset'\n",
        "  actual_performance = 'Test Accuracy' if scenario == 'classification' else 'Test Mean IoU'\n",
        "\n",
        "  if scenario == 'classification':  # Filter based on backbone choice\n",
        "    data_df = data_df[data_df['Trainable Backbone'] == frozen_backbone]\n",
        "\n",
        "  source_pool = data_df[sampling_what].unique()\n",
        "  num_sources2sample = int(sampling_ratio * len(source_pool))\n",
        "  source_combinations = list(itertools.combinations(source_pool, num_sources2sample))\n",
        "\n",
        "  transferability_metrics = data_df['Transferability Metric'].unique()\n",
        "  target_datasets = data_df['Target Dataset'].unique()\n",
        "  evaluation_results = []\n",
        "\n",
        "  for transf_metric in transferability_metrics:\n",
        "    data_df_metric = data_df[data_df['Transferability Metric'] == transf_metric]\n",
        "    for target_dataset in target_datasets:\n",
        "      data_df_metric_target = data_df_metric[data_df_metric['Target Dataset'] == target_dataset]\n",
        "      for i, source_set in enumerate(\n",
        "          tqdm.tqdm(source_combinations,\n",
        "                    desc=f'Calculating evaluation measures for {transf_metric} applied to the target dataset {target_dataset}.')):\n",
        "        # If present, filter case when source == target\n",
        "        source_set = [s for s in source_set if s != target_dataset]\n",
        "        # Get transferability scores for all sources in the source set\n",
        "        transferability_scores = [data_df_metric_target[\n",
        "            data_df_metric_target[sampling_what] == source]['Transferability Metric Score'].values[0] for source in source_set]\n",
        "        # Get actual performance for all sources in the source set\n",
        "        actual_performances = [data_df_metric_target[\n",
        "            data_df_metric_target[sampling_what] == source][actual_performance].values[0] for source in source_set]\n",
        "        # Get evaluation measures for a particular target, metric and source set\n",
        "        evaluation_measures = get_evaluation_measure(transferability_scores, actual_performances, transf_metric, target_dataset, i)\n",
        "        evaluation_results.extend(evaluation_measures)  \n",
        "\n",
        "  evaluation_results_df = pd.DataFrame(\n",
        "      evaluation_results,\n",
        "      columns=['Transferability Metric', 'Target Dataset', 'Evaluation Measure Score', 'Evaluation Measure', 'Sources ID'])\n",
        "  \n",
        "  return evaluation_results_df"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "5-R7BdzNywFi"
      },
      "outputs": [],
      "source": [
        "# Naming Conversion for plotting\n",
        "\n",
        "naming_conversion = {\n",
        "    'leep': 'LEEP',\n",
        "    'nleep': '$\\mathcal{N}$LEEP',\n",
        "    'gbc': 'GBC',\n",
        "    'hscore': 'H-score',\n",
        "    'logme': 'LogME',\n",
        "    'cifar10': 'CIFAR10',\n",
        "    'cifar100': 'CIFAR100',\n",
        "    'imagenette': 'Imagenette',\n",
        "    'oxford_iiit_pet': 'Oxford Pets',\n",
        "    'caltech_birds2011': 'Caltech Birds',\n",
        "    'stanford_dogs': 'Stanford Dogs',\n",
        "    'oxford_flowers102': 'Oxford Flowers',\n",
        "    'sun397': 'SUN',\n",
        "    'dtd': 'DTD',\n",
        "    'w-kendall': r'$\\tau_w$',\n",
        "    'kendall': r'$\\tau$',\n",
        "    'pearson': r'$\\rho$',\n",
        "    'rel': r'$Rel@1$',\n",
        "    'pcontext': 'Pascal Context',\n",
        "    'ade': 'ADE20K',\n",
        "    'coco': 'COCO',\n",
        "    'kitti': 'KITTI',\n",
        "    'city': 'CityScapes',\n",
        "    'idd': 'IDD',\n",
        "    'bdd': 'BDD',\n",
        "    'mapillary': 'MVD',\n",
        "    'isprs': 'ISPRS',\n",
        "    'isaid': 'iSAID',\n",
        "    'sunrgbd': 'SUN RGB-D',\n",
        "    'scannet': 'ScanNet',\n",
        "    'suim': 'SUIM',\n",
        "    'vkitti2': 'vKITTI2',\n",
        "    'vgallery': 'vGallery',\n",
        "    'camvid': 'CamVid',\n",
        "    'pvoc': 'Pascal VOC'\n",
        "}"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "WMZVeJ9114R6"
      },
      "outputs": [],
      "source": [
        "def compare_metrics_pairs(data_df: pd.DataFrame, evaluation_measure: str='w-kendall'):\n",
        "  \"\"\"Qualitative analysis to compare metric pairs, method 4.2 in the paper.\"\"\"\n",
        "\n",
        "  # Filter chosen evaluation measure for plotting\n",
        "  data_df = data_df[data_df['Evaluation Measure'] == evaluation_measure]\n",
        "\n",
        "  # Extract color map for plotting\n",
        "  cmap = plt.cm.tab20\n",
        "  colors = [cmap(i) for i in range(0, cmap.N, 2)][::-1] + [cmap(i) for i in range(1, cmap.N, 2)][::-1]\n",
        "\n",
        "  transferability_metrics = data_df['Transferability Metric'].unique()\n",
        "  target_datasets = data_df['Target Dataset'].unique()\n",
        "  transf_metrics_pairs = list(itertools.combinations(transferability_metrics, 2))\n",
        "\n",
        "  fig, ax = plt.subplots(2, 5, figsize=(15, 5))\n",
        "  plt.subplots_adjust(wspace=0.6, hspace=0.5)\n",
        "\n",
        "  for i, metrics_pair in enumerate(transf_metrics_pairs):\n",
        "    # Loop over all possible transferablity metric pairs\n",
        "    metric_a, metric_b = metrics_pair[0], metrics_pair[1]\n",
        "    fig_i = i % 5\n",
        "    fig_j = i // 5\n",
        "    ax[fig_j][fig_i].set_xlabel(naming_conversion[metric_a], fontsize=15)\n",
        "    ax[fig_j][fig_i].set_ylabel(naming_conversion[metric_b], fontsize=15)\n",
        "    for k, target in enumerate(target_datasets):\n",
        "      # Plot scatterplot of specific target datasets\n",
        "      target_data = data_df[data_df['Target Dataset'] == target]\n",
        "      x = target_data[target_data['Transferability Metric'] == metric_a]['Evaluation Measure Score'].values\n",
        "      y = target_data[target_data['Transferability Metric'] == metric_b]['Evaluation Measure Score'].values\n",
        "      ax[fig_j][fig_i].scatter(x, y, s=0.3, c=colors[k])\n",
        "    x = data_df[data_df['Transferability Metric'] == metric_a]['Evaluation Measure Score'].values\n",
        "    y = data_df[data_df['Transferability Metric'] == metric_b]['Evaluation Measure Score'].values\n",
        "    if fig_i == 4 and fig_j == 1:  # Last plot\n",
        "      ax[fig_j][fig_i].legend([naming_conversion[t] for t in target_datasets], bbox_to_anchor=(0., -0.4), markerscale=14, fontsize=14, ncol=5)\n",
        "    ax[fig_j][fig_i].plot([min(x), max(x)], [min(x), max(x)], color='r', linestyle='dashed', linewidth=1.5)\n",
        "    ax[fig_j][fig_i].set_ylim([min(y), max(y)])\n",
        "\n",
        "  plt.show()"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "bZCC_--x11wM"
      },
      "outputs": [],
      "source": [
        "# Results folders\n",
        "\n",
        "classification_path = 'source_model_architectures_table.csv'\n",
        "semantic_seg_path = 'source_datasets_table.csv'\n",
        "\n",
        "def load_results(filename):\n",
        "  \"\"\"Returns pandas dataframe.\"\"\"\n",
        "  df = pd.read_csv(filename, index_col=0)\n",
        "  return df"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "HGtIVSrV1YiY"
      },
      "outputs": [],
      "source": [
        "# Load semantic segmentation results\n",
        "\n",
        "segmentation_results_df = load_results(semantic_seg_path)\n",
        "\n",
        "# Subsample experiments\n",
        "\n",
        "sampled_segmentation_results_df = evaluate_subsampled_experiments(\n",
        "    segmentation_results_df,\n",
        "    sampling_ratio=0.9,\n",
        "    scenario='semantic segmentation',\n",
        "    frozen_backbone=False)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "ZqAV0oyl1c3S"
      },
      "outputs": [],
      "source": [
        "# Qualitative analysis\n",
        "\n",
        "evaluation_measure = 'w-kendall'\n",
        "compare_metrics_pairs(sampled_segmentation_results_df, evaluation_measure)"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "dnujX7XAVRKL"
      },
      "outputs": [],
      "source": [
        "# Quantitative analysis - Setup Stability Score varying one component at the time\n",
        "\n",
        "components = ['Target Dataset', 'Sources ID', 'Evaluation Measure']\n",
        "ss_score = {}\n",
        "\n",
        "for varying in ['Target Dataset', 'Sources ID', 'Evaluation Measure']:\n",
        "  fixing = [c for c in components if c != varying]\n",
        "  ss_score[varying] = setup_stability_score.get_setup_stability_score(\n",
        "      sampled_segmentation_results_df,\n",
        "      varying=varying,\n",
        "      fixing=fixing)\n",
        "\n",
        "ss_score"
      ]
    }
  ],
  "metadata": {
    "colab": {
      "collapsed_sections": [],
      "name": "results_analysis.ipynb",
      "provenance": [
        {
          "file_id": "12SrlmG3xmIR4JhSKM1buw-AtfWKx_dYv",
          "timestamp": 1657889621726
        }
      ],
      "source": [
        "Licensed under the Apache License, Version 2.0"
      ]
    },
    "kernelspec": {
      "display_name": "Python 3",
      "name": "python3"
    },
    "language_info": {
      "name": "python"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}
